6. Differential abundance
1 INFO
This Rmarkdown document contains code for analysis of differential abundances from microbiome data provided from output from the script 1_Import.Rmd based on output from the DF_GMH_PIPELINE. For analyses of differential abundance we will use the DAtest package (Russel et al., 2018).
2 SETUP
knitr::opts_chunk$set(echo = TRUE)
# Load libraries
library(tidyverse)
library(phyloseq)
library(decontam)
library(pals)
library(ggpubr)
library(rstatix)
library(vegan)
library(reshape2)
library(DAtest)
library(ape)
library(kableExtra)
library(plotly)
library(magick)
library(forcats)
saveRDS(params, "R_objects/comp_params.RDS")2.0.1 SCRIPTS
These functions are used to merge ASV that are not one of the most abundant, or below a defined threshold, into “Other”
# This function merges anything not in the top (10 by default) most abundant ASVs
merge_less_than_top <- function(pobject, top=10){
# transform to sample counts to relative values
transformed <- transform_sample_counts(pobject, function(x) x/sum(x))
# Orientate and export otu table
if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
# sort otu table by decreasing abundance
otu.sort <- otu.table[order(rowMeans(otu.table), decreasing = TRUE),]
# Create list of ASVs to merge
otu.list <- row.names(otu.sort[(top+1):nrow(otu.sort),])
if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")
# perform the merger with the original sample counts
merged <- merge_taxa(pobject, otu.list, 1)
# change the taxa name
taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
# change taxa to "Other" for all levels not being NAs
for (i in 1:ncol(tax_table(merged))){
if (sum(!is.na(tax_table(merged)[,i]))){
tax_table(merged)["Others",i] <- "Others"
}
}
return(merged)
}
# This function merges ASVs that are below a defined ratio, with the default being 0.001 (0.1%)
merge_low_abundance <- function(pobject, threshold=0.001){
# transform to sample counts to relative values
transformed <- transform_sample_counts(pobject, function(x) x/sum(x))
# Orientate and export otu table
if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
# Create list of ASVs to merge
otu.list <- row.names(otu.table[rowMeans(otu.table) < threshold,])
if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")
# perform the merger with the original sample counts
merged <- merge_taxa(pobject, otu.list, 1)
# change the taxa name
taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
# change taxa to "Other" for all levels not being NAs
for (i in 1:ncol(tax_table(merged))){
if (sum(!is.na(tax_table(merged)[,i]))){
tax_table(merged)["Others",i] <- "Others"
}
}
return(merged)
}
merge_prevalence <- function(pobject, variable, min.samples){
# transform to sample counts to relative values
if (taxa_are_rows(transformed)) otu.table <- as.data.frame(otu_table(transformed)) else otu.table <- as.data.frame(t(otu_table(transformed)))
# make binary
otu.table[otu.table != 0] <- 1
# extract metadata
dat <- data.frame(sample_data(pobject))
# set groups
vgroup <- unique(dat[,variable])
# create count table
counts <- data.frame(row.names = row.names(otu.table))
for (i in seq(length(vgroup))) counts[,vgroup[i]] <- rowSums(otu.table[,dat[,variable]==vgroup[i]])
counts$keep <- ifelse(rowSums(counts) > min.sample*length(vgroup), TRUE, FALSE)
# Create list of ASVs to merge
otu.list <- row.names(counts[!counts$keep,])
if(any("Others" %in% rownames(otu.table))) otu.list <- c(otu.list,"Others")
# perform the merger with the original sample counts
merged <- merge_taxa(pobject, otu.list, 1)
# change the taxa name
taxa_names(merged)[taxa_names(merged) %in% otu.list[1]] <- "Others"
# change taxa to "Other" for all levels not being NAs
for (i in 1:ncol(tax_table(merged))){
if (sum(!is.na(tax_table(merged)[,i]))){
tax_table(merged)["Others",i] <- "Others"
}
}
return(merged)
}
# save functions
save(merge_less_than_top, merge_low_abundance, file = "scripts/merge_abundance.Rdata")
# clear the environment and release memory
rm(list = ls(all.names = TRUE)) #will clear all objects includes hidden objects.
invisible(gc()) #free up memory and report the memory usage.3 COMPOSITIONAL DIFFERENCES
The last part of the initial analyses is to compare the composition of the samples and test the differential abundance of individual ASVs or taxa.
3.1 AGGLOMERATE DATA - All data
Depending on the project and data it might be relevant to agglomerate data at species or higher taxonomic level for this type of analysis.
params <- readRDS("R_objects/comp_params.RDS")
# load
load(params$input)
# Remove day 20
phy <- subset_samples(physeq = phy, day %in% c("d0","d08","d12","d16","d21"))
# agglomerate at species and genus level
phy.sp <- tax_glom(phy, taxrank = "Species")
phy.ge <- tax_glom(phy, taxrank = "Genus")
phy.fa <- tax_glom(phy, taxrank = "Family")
phy.or <- tax_glom(phy, taxrank = "Order")
phy.cl <- tax_glom(phy, taxrank = "Class")
phy.ph <- tax_glom(phy, taxrank = "Phylum")
# save agglomerated phyloseq objects
save(phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph, file = "R_objects/Agglomerated_all.RData")
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())3.2 AGGLOMERATE DATA - ITERATIONS
The following section agglomerates the data on taxonomic levels based on subsets of Material (Feces, Ileum, or Cecum), Day of sampling (Day 0, 8, 12, 16, 20, and 21), Feed type (LF = Low fibre, HF = High fibre), and Treatment (CTRL = Control, PFOS = PFOS). Agglomerations are saved in files with a specific naming convention for easy calling in further analysis.
params <- readRDS("R_objects/comp_params.RDS")
# load
load(params$input)
# Remove day 20
phy <- subset_samples(physeq = phy, day %in% c("d0","d08","d12","d16","d21"))
str.agg <- c("Feces","Feces_LF","Feces_HF","Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF","Feces_CTRL","Feces_PFOS","Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d21","Feces_d0d08","Feces_d08d21")
for (SUBNAME in str.agg) {
# Determine material parameter
if (grepl("Feces",SUBNAME) == TRUE) {
MTRL <- "Feces"
mn <- "MTRL"
} else if (grepl("Cecum", SUBNAME) == TRUE) {
MTRL <- "Cecum"
mn <- "MTRL"
} else if (grepl("Ileum", SUBNAME) == TRUE) {
MTRL <- "Ileum"
mn <- "MTRL"
}
# Determine parameters for differential abundance on specific days
if (grepl("_d0d08", SUBNAME) ==TRUE) {
DAY <- c("d0","d08")
dn <- "day0+8"
} else if (grepl("_d08",SUBNAME) == TRUE) {
DAY <- c("d08")
dn <- "day8"
} else if (grepl("_d08d21",SUBNAME) == TRUE) {
DAY <- c("d08","d21")
dn <- "day8+21"
} else if (grepl("_d0",SUBNAME) == TRUE) {
DAY <- "d0"
dn <- "day0"
} else if (grepl("_d12",SUBNAME) == TRUE) {
DAY <- "d12"
dn <- "day12"
} else if (grepl("_d16",SUBNAME) == TRUE) {
DAY <- "d16"
dn <- "day16"
} else if (grepl("_d21",SUBNAME) == TRUE) {
DAY <- "d21"
dn <- "day21"
} else {
dn <- "all_days"
}
# Determine parameters for differential abundance in feed and treatment groups
if (grepl(paste0(MTRL,"_CTRL"),SUBNAME) == TRUE) {
TREAT <- "CTRL"
FEED <- c("LF","HF")
} else if (grepl(paste0(MTRL,"_PFOS"),SUBNAME) == TRUE) {
TREAT <- "PFOS"
FEED <- c("LF","HF")
} else if (grepl("_LF_CTRL",SUBNAME) == TRUE) {
TREAT <- "CTRL"
FEED <- "LF"
} else if (grepl("_LF_PFOS",SUBNAME) == TRUE) {
TREAT <- "PFOS"
FEED <- "LF"
} else if (grepl("_HF_CTRL",SUBNAME) == TRUE) {
TREAT <- "CTRL"
FEED <- "HF"
} else if (grepl("_HF_PFOS",SUBNAME) == TRUE) {
TREAT <- "PFOS"
FEED <- "HF"
} else if (grepl(paste0(MTRL,"_LF"),SUBNAME) == TRUE) {
TREAT <- c("CTRL","PFOS")
FEED <- "LF"
} else if (grepl(paste0(MTRL,"_HF"),SUBNAME) == TRUE) {
TREAT <- c("CTRL","PFOS")
FEED <- "HF"
} else if (dn == "all_days") {
TREAT <- c("CTRL","PFOS")
FEED <- c("LF","HF")
} else if (exists("dn") == TRUE) {
} else {
print("Something went wrong...")
}
#Subset data
if (exists("DAY") == TRUE) {
phy.sub <- subset_samples(phy, material == MTRL & day %in% DAY)
} else if (exists("DAY") == FALSE) {
phy.sub <- subset_samples(phy, material == MTRL & treatment %in% TREAT & feed %in% FEED)
}
# Agglomerate at species and genus level
phy.sp <- tax_glom(phy.sub, taxrank = "Species")
phy.ge <- tax_glom(phy.sub, taxrank = "Genus")
phy.fa <- tax_glom(phy.sub, taxrank = "Family")
phy.or <- tax_glom(phy.sub, taxrank = "Order")
phy.cl <- tax_glom(phy.sub, taxrank = "Class")
phy.ph <- tax_glom(phy.sub, taxrank = "Phylum")
# save agglomerated phyloseq objects
save(phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph, file = paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
print(paste0("Processed ",SUBNAME," correctly!"))
suppressWarnings(rm(MTRL,mn,DAY,dn,TREAT,FEED,phy.sp, phy.ge, phy.fa, phy.or, phy.cl, phy.ph,phy.sub), classes = "warning")
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4 DIFFERENTIAL ABUNDANCE
Differential abundance is tested here based on significant differences in relative abundance in specific taxonomic entries based on groupings of feed (HF and LF) and treatment (CTRL and PFOS).
There are many different methods that can be used to calculate differential abundance, all with their own advantages and disadvantages. Which method to use depends on the data and therefore I will be using DAtest [@DAtest] as described by Russel et al. (2018).
4.1 FECES
In the following we take a look at differential abundance in feces samples, dividing the analyses into Days and type of test: - Test of PFOS effect on taxa abundance (calling SUBNAME = _LF & _HF) - Test of FEED effect on taxa abundance (calling SUBNAME = _CTRL & _PFOS)
We test all on three taxonomic levels: - Species - Genus - Family
4.1.1 DAY 0
Day 0 samples should have no taxonomic differences between Treatment groups while a difference is expected in Feed. Therefore we test only these two variables on the entire dataset (cross analysis should not be necessary).
4.1.1.1 FEED
Testing differential abundance between feeding groups LF = Low fibre, HF = High fibre.
4.1.1.1.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0"
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()Result: TTA (t-test)
RUN DAtest
Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Calculate pseudocount
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
# Create plot
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed))) +
geom_boxplot() +
scale_y_log10() +
coord_flip() +
scale_fill_manual(name = "Feed", values = params$COLFEED, breaks = c("HF","LF")) +
facet_grid(Phylum ~ . , scales = "free_y", space = "free") +
theme_bw() +
labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 250, height = 250, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 250, height = 250, dpi = 300))
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.1.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COLFEED, breaks = c("LF","HF")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw() + labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 150, height = 250, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 150, height = 250, dpi = 300))
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.1.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",PDI,"_",LVL,".csv"))
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COLFEED, breaks = c("LF","HF")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw() + labs(fill = "Feed")
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY))
p.dT
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 150, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 150, height = 200, dpi = 300))
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.2 TREATMENT in HF
4.1.1.2.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.2.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.2.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.3 TREATMENT in LF
4.1.1.3.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.sp <- subset_samples(phy.sp, material == MTRL)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.3.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d0" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "treatment" #c("treatment","feed")
SUBNAME <- "Feces_d0" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.1.4 CONCLUSIONS
Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 0.
4.1.2 DAY 8
Samples at day 8 are taken 24 hours after last gavage of corn oil (+/- PFOS) and prior to dissection of half the animals.
4.1.2.1 Treatment in LF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.
4.1.2.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()Result: TTA (t-test)
RUN DAtest
Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.2 Treatment in HF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF.
4.1.2.2.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.2.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.2.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.3 Feed in CTRL
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).
4.1.2.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.3.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.4 Feed in PFOS
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).
4.1.2.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d08" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d08" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.2.5 CONCLUSIONS
Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 8.
4.1.3 DAY 12
4.1.3.1 Treatment in LF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.
4.1.3.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())Result: TTA (t-test)
Having now identified t-test - ALR (tta) to analyse the two groups as the optimal test we will use it to calculate differential abundance. This test is also used for remaining analyses in this section.
4.1.3.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.2 Treatment in HF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.3 Feed in CTRL
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).
4.1.3.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.4 Feed in PFOS
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).
4.1.3.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d12" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d12" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.3.5 CONCLUSIONS
Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 12.
4.1.4 DAY 16
4.1.4.1 Treatment in LF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.
4.1.4.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.2 Treatment in HF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.3 Feed in CTRL
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).
4.1.4.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.3.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.4 Feed in PFOS
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).
4.1.4.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d16" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d16" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ltt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.4.5 CONCLUSIONS
Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 16.
4.1.5 DAY 21
Samples at day 8 are taken 24 hours after last gavage of corn oil (+/- PFOS) and prior to dissection of half the animals.
4.1.5.1 Treatment in LF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.
4.1.5.1.1 SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()RUN DAtest
Having now identified t-test - CLR (ttc) as the optimal test I will use it to calculate differential abundance
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.1.2 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttr(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.1.3 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "LF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("LF_CTRL","LF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.2 Treatment in HF
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in HF. ##### SPECIES (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.2.1 GENUS (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.2.2 FAMILY (NO SIGNIFICANCE)
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
FEED <- "HF" #c("LF","HF")
#TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.tta(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",FEED,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("Control","PFOS"), breaks = c("HF_CTRL","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",FEED))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",FEED,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.3 Feed in CTRL
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the control treatment group (CTRL).
4.1.5.3.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.3.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d21","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.3.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "CTRL" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttt(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_CTRL","HF_CTRL")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.4 Feed in PFOS
Here we investigate the differential abundance driven by feed (No fibre vs. Fibre) in the PFOS treatment group (PFOS).
4.1.5.4.1 SPECIES
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Species" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
if (exists("FEED") == TRUE) {
phy.sp <- subset_samples(phy.sp, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.sp <- subset_samples(phy.sp, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.sp <- subset_samples(phy.sp, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.sp, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Species, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.4.2 GENUS
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.ge <- subset_samples(phy.ge, material == MTRL)
if (exists("FEED") == TRUE) {
phy.ge <- subset_samples(phy.ge, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.ge <- subset_samples(phy.ge, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.ge <- subset_samples(phy.ge, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.ttc(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Genus, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.4.3 FAMILY
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family" #c("Species","Genus","Family")
MTRL <- "Feces" #c("Feces","Ileum","Cecum")
DAY <- "d21" #c("d0","d08","d12","d16","d20","d21")
#FEED <- "LF" #c("LF","HF")
TREAT <- "PFOS" #c("CTRL","PFOS")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Feces_d21" #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Filter samples (if needed)
# phy.fa <- subset_samples(phy.fa, material == MTRL)
if (exists("FEED") == TRUE) {
phy.fa <- subset_samples(phy.fa, feed == FEED)
print(paste0("Data contains type ",FEED," feed."))
} else {
print("Data contains both feed LF and HF.")
}
if (exists("DAY") == TRUE) {
phy.fa <- subset_samples(phy.fa, day %in% DAY)
print(paste0("Data contains samples from day ",DAY,"."))
} else {
print("Data contains all days.")
}
if (exists("TREAT") == TRUE) {
phy.fa <- subset_samples(phy.fa, treatment == TREAT)
print(paste0("Data contains only ",TREAT," treatment groups."))
} else {
print("Data contains all treatment groups.")
}
# Filter data
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
# Test best method
filt.test <- testDA(filt, predictor = PDI, effectSize = 10)
# Evaluate the plot and summary table
sum.fil <- summary(filt.test)
write.csv(sum.fil, file = paste0("plots/differential_abundance/",SUBNAME,"/filter_summary_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
p.fil <- plot(filt.test)
p.fil
pdf(paste0("plots/differential_abundance/",SUBNAME,"/filter_plot_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".pdf"))
p.fil
dev.off()RUN DAtest
Having now identified LIMMA - CLR (lic) as the optimal test I will use it to calculate differential abundance
# Run the selected analysis
filt.DA <- DA.lic(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
write.csv(filt.p5,paste0("plots/differential_abundance/",SUBNAME,"/stats_",MTRL,"_",DAY,"_",TREAT,"_",PDI,"_",LVL,".csv"))
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
p.dif <- ggplot(DAm, aes(x = Family, y = Abundance+pseudocount, fill = fct_rev(feed_treat))) + geom_boxplot() + scale_y_log10() + coord_flip() + scale_fill_manual(values = params$COL1, name = "Feed", labels = c("HF","LF"), breaks = c("LF_PFOS","HF_PFOS")) + facet_grid(Phylum ~ . , scales = "free_y", space = "free") + theme_bw()
p.dT <- p.dif + ggtitle(paste0("Differential abundance on ",LVL," level for ",MTRL," ",DAY," ",TREAT))
print(p.dT)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".pdf"), p.dT, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",TREAT,"_",PDI,"_",LVL,".png"), p.dT, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
if (exists("FEED") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for feed ",FEED,"."))
} else if (exists("TREAT") == TRUE) {
message(paste0("No significant results when testing ",PDI," in ",SUBNAME," subset for treatment ",TREAT,"."))
}
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())4.1.5.5 CONCLUSIONS
Significant differential abundances were observed between diets but not between treatment groups within each feed group on Day 21.
5 ILEUM & CAECUM
Here we take a look at the differential abundance driven by PFOS treatment (CTRL vs. PFOS) in LF.
5.0.0.0.1 Check for PFOS-driven DA
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Family"
LVLS <- c("Species","Genus","Family")
MTRL <- "Ileum_LF" #c("Feces","Ileum","Cecum")
PDI <- "feed_treat" #c("treatment","feed")
SUBNAME <- "Ileum_LF"
SUBNAMES <- c("Ileum_HF","Ileum_LF","Cecum_HF","Cecum_LF") #c("Feces","Feces_LF","Feces_HF","Feces_CTRL","Feces_PFOS","Feces_LF_CTRL","Feces_LF_PFOS","Feces_HF_CTRL","Feces_HF_PFOS", "Feces_d0","Feces_d08","Feces_d12","Feces_d16","Feces_d20","Feces_d21","Feces_d0d08","Feces_d08d21", "Ileum","Ileum_LF","Ileum_HF","Cecum","Cecum_LF","Cecum_HF")
for (SUBNAME in SUBNAMES) {
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
for (LVL in LVLS) {
if (LVL == "Species") {
phy.used <- phy.sp
} else if (LVL == "Genus") {
phy.used <- phy.ge
} else if (LVL == "Family") {
phy.used <- phy.fa
}
# Filter data
filt <- preDA(data = phy.used, min.reads = 20, min.samples = 4)
# Run the selected analysis
filt.DA <- DA.kru(filt, predictor = PDI)
# Evaluate the plot and summary table
table(filt.DA$pval.adj < 0.05)
filt.p5 <- filt.DA[filt.DA$pval.adj < 0.05,]
if (any(filt.p5$pval.adj > 0)) {
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
DA.sig <- prune_taxa(filt.DA$Feature[filt.DA$pval.adj < 0.05], x = filt.ra)
# melt the data
DAm <- psmelt(DA.sig)
# Create plot
pseudocount <- min(DAm$Abundance[DAm$Abundance != 0])
# Statistics
stat.in <- DAm %>%
group_by(.data[[LVL]], feed, day) %>%
wilcox_test(as.formula("Abundance ~ feed_treat")) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj") %>%
add_xy_position(x = "feed", dodge = 0.8) %>%
p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.in$y.position <- log10(stat.in$y.position)
stat.in
stat.out <- DAm %>%
group_by(.data[[LVL]], day) %>%
wilcox_test(as.formula("Abundance ~ feed")) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj") %>%
add_xy_position(x = "feed", dodge = 0.8) %>%
p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.out$y.position <- log10(stat.out$y.position)
stat.out
p <- ggplot(DAm, aes(x = feed, y = Abundance+pseudocount)) +
geom_boxplot(aes(fill = fct_rev(feed_treat))) +
scale_y_log10() +
# coord_flip() +
scale_fill_manual(values = params$COL1, name = "Treatment", labels = c("HF_CTRL" ="HF-CTRL","HF_PFOS" = "HF-PFOS","LF_CTRL" = "LF-CTRL","LF_PFOS" = "LF_PFOS"), breaks = c("HF_CTRL","HF_PFOS","LF_CTRL","LF_PFOS")) +
facet_grid(day ~ .data[[LVL]] , scales = "free_y", space = "free", labeller = labeller(day = c("d08" = "Day 8","d21" = "Day 21"))) +
theme_pubr(border = TRUE)
p
p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
p.stat
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".pdf"), p.stat, device = "pdf", units = "mm", width = 350, height = 200, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",PDI,"_",LVL,".png"), p.stat, device = "png", units = "mm", width = 350, height = 200, dpi = 300))
} else {
print(paste0("No significant results when testing ",PDI," in ",SUBNAME," at ",LVL,"."))
}
}
}## 91 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Species."
## 65 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Genus."
## 25 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_HF at Family."
## 105 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Species."
## 76 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Genus."
## 27 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Ileum_LF at Family."
## 59 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Species."
## 39 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Genus."
## 19 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_HF at Family."
## 57 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Species."
## 41 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Genus."
## 19 features grouped as 'Others' in the output
## [1] "No significant results when testing feed_treat in Cecum_LF at Family."
5.1 Conclusion
No significant differences observed in caecum and ileum.
6 ENTEROBACTERIACEAE > GENERA
6.1 ABUNDANCE + PSEUDOCOUNT (Figure 7)
Significant difference in abundance was found for unclassified genera “Family_Enterobacteriaceae” above. Here we investigate and plot all genera present on day 8 in the family “Enterobacteriaceae”.
# Collected figures for Families showing significant differences
params <- readRDS("R_objects/comp_params.RDS")
LVL <- "Genus"
LVLS <- c("Genus","Family")
SUBNAME <- "Feces"
PDI <- "feed_treat"
TX <- "Enterobacteriaceae"
# Create sub-folder
if (!file.exists(paste0("plots/differential_abundance/",SUBNAME))) dir.create(file.path(getwd(), paste0("plots/differential_abundance/",SUBNAME)))
# Load agglomorated subset
load(paste0("R_objects/Agglomerated_",SUBNAME,".RData"))
for (LVL in LVLS) {
# Filter data
if (LVL == "Genus") {
filt <- preDA(data = phy.ge, min.reads = 20, min.samples = 4)
} else if (LVL == "Family") {
filt <- preDA(data = phy.fa, min.reads = 20, min.samples = 4)
}
# Run tha selected analysis Wilcoxon
filt.DA <- DA.wil(filt, predictor = "feed")
# Create a subset of the samples
filt.ra <- transform_sample_counts(filt, function(x) x/sum(x)*100)
filt.t <- subset_taxa(filt.ra, Family == TX)
# Melt the data
t.melt <- psmelt(filt.t)
# Create pseudocounts
pseudocount <- min(t.melt$Abundance[t.melt$Abundance != 0])
# Sort samples to phylogenetic levels (alphabetically)
if (LVL == "Genus") {
t.sorted <- t.melt %>% select(Phylum:Genus) %>% arrange(Phylum, Class, Order, Family, Genus)
genus.sorted <- as.character(unique(t.sorted$Genus))
t.melt$genus_new <- factor(t.melt$Genus, levels = genus.sorted)
levels(t.melt$genus_new) %>% head
} else if (LVL == "Family") {
t.sorted <- t.melt %>% select(Phylum:Family) %>% arrange(Phylum, Class, Order, Family)
fam.sorted <- as.character(unique(t.sorted$Family))
t.melt$fam_new <- factor(t.melt$Family, levels = fam.sorted)
levels(t.melt$genus_new) %>% head
}
# Statistics
stat.in <- t.melt %>%
group_by(.data[[LVL]], feed, day) %>%
wilcox_test(as.formula("Abundance ~ feed_treat")) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj") %>%
add_xy_position(x = "feed", dodge = 0.8) %>%
p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.in$y.position <- log10(stat.in$y.position)
stat.in
stat.out <- t.melt %>%
group_by(.data[[LVL]], day) %>%
wilcox_test(as.formula("Abundance ~ feed")) %>%
adjust_pvalue(method = "BH") %>%
add_significance("p.adj") %>%
add_xy_position(x = "feed", dodge = 0.8) %>%
p_format("p.adj", accuracy = 0.0001, trailing.zero = TRUE, new.col = TRUE)
stat.out$y.position <- log10(stat.out$y.position)
stat.out
# Plot data and save as output formats
p <- ggplot(t.melt, aes(feed, Abundance+pseudocount)) +
geom_boxplot(aes(fill = feed_treat), color = "black") +
scale_y_log10(name = "Abundance+pseudocount (log10)", expand = expansion(mult = c(0, 0.075)), labels =function(x) paste0(x, "%")) +
scale_fill_manual(name = "Group", values = params$COL1,
breaks = c("HF_CTRL","HF_PFOS","LF_CTRL","LF_PFOS"),
labels = c("HF-CTRL","HF-PFOS","LF-CTRL","LF-PFOS")) +
facet_grid(.data[[LVL]] ~ day, space = "free",
labeller = labeller(day = c("d0" = "Day 0","d08" = "Day 8","d12" = "Day 12","d16" = "Day 16","d21" = "Day 21"))) +
theme_pubr(border = TRUE) +
guides(color = "none") +
theme(axis.title.x = element_blank())
p
if (SUBNAME == "Feces") {
if (LVL == "Genus") {
p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, y.position = c(1.1,1.4,1.2,1.3,1.3,0.01,0.01))
} else {
p.stat <- p + stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
}
} else if (SUBNAME %in% c("Ileum","Cecum")) {
p.stat <- p + #stat_pvalue_manual(stat.in, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE, color = "red") +
stat_pvalue_manual(stat.out, label = "p.adj.signif", tip.length = 0, hide.ns = TRUE)
}
print(p.stat)
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",LVL,"_",TX,".pdf"), p.stat, device = "pdf", units = "mm", width = 200, height = 100, dpi = 300))
suppressMessages(ggsave(paste0("plots/differential_abundance/",SUBNAME,"/diffplot_",SUBNAME,"_",LVL,"_",TX,".png"), p.stat, device = "png", units = "mm", width = 200, height = 100, dpi = 300))
save(stat.in, stat.out, p.stat, file = paste0("plots/differential_abundance/",SUBNAME,"/plot_stat_",SUBNAME,"_",LVL,".Rdata"))
}
# clear the environment and release memory
rm(list = ls(all.names = TRUE))
invisible(gc())
7 SETTINGS
Overview of the packages that was used for this analysis
7.1 PACKAGES
The analysis was run in R version getRversion() using
the following packages:
pack <- data.frame(Package = (.packages()))
for (i in seq(nrow(pack))) pack$Version[i] <- as.character(packageVersion(pack$Package[i]))
kbl(pack[order(pack$Package),], row.names = F) %>% kable_classic(lightable_options = "striped") | Package | Version |
|---|---|
| ape | 5.8 |
| base | 4.3.1 |
| datasets | 4.3.1 |
| DAtest | 2.8.0 |
| decontam | 1.22.0 |
| dplyr | 1.1.4 |
| forcats | 1.0.0 |
| ggplot2 | 3.5.1 |
| ggpubr | 0.6.0 |
| graphics | 4.3.1 |
| grDevices | 4.3.1 |
| kableExtra | 1.4.0 |
| lattice | 0.21.8 |
| lubridate | 1.9.3 |
| magick | 2.8.4 |
| methods | 4.3.1 |
| pals | 1.9 |
| permute | 0.9.7 |
| phyloseq | 1.46.0 |
| plotly | 4.10.4 |
| purrr | 1.0.2 |
| readr | 2.1.5 |
| reshape2 | 1.4.4 |
| rstatix | 0.7.2 |
| stats | 4.3.1 |
| stringr | 1.5.1 |
| tibble | 3.2.1 |
| tidyr | 1.3.1 |
| tidyverse | 2.0.0 |
| utils | 4.3.1 |
| vegan | 2.6.6.1 |
7.2 PARAMETERS
params <- readRDS("R_objects/comp_params.RDS")
tmp <- unlist(params)
dat <- data.frame(Parameter = names(tmp), Value = unname(tmp))
kbl(dat, row.names = F) %>% kable_classic(lightable_options = "striped")| Parameter | Value |
|---|---|
| input | R_objects/Phyloseq.Rdata |
| meta | input/metadata_fibrex.csv |
| batch | Run |
| indeces | Observed|Shannon|FaithPD|Chao1 |
| COL0.CTRL | #eeeeee |
| COL0.PFOS | #666666 |
| COL1.LF_CTRL | #a5cee3 |
| COL1.LF_PFOS | #1778b6 |
| COL1.HF_CTRL | #b4d88a |
| COL1.HF_PFOS | #30a148 |
| COL2.LF_CTRL_d8 | #a5cee3 |
| COL2.LF_CTRL_d21 | #a5cee3 |
| COL2.LF_PFOS_d8 | #1778b6 |
| COL2.LF_PFOS_d21 | #1778b6 |
| COL2.HF_CTRL_d8 | #b4d88a |
| COL2.HF_CTRL_d21 | #b4d88a |
| COL2.HF_PFOS_d8 | #30a148 |
| COL2.HF_PFOS_d21 | #30a148 |
| COLFEED.LF | #1778b6 |
| COLFEED.HF | #30a148 |